# ------------------------------------------------------------------------------
# Replication Materials
# 
# title: Eliciting Beliefs as Distributions in Online Surveys
# journal: Political Analysis
# authors: Lucas Leemann, Richard Traunmüller, and Lukas Stoetzer
# date: August 2020
# ------------------------------------------------------------------------------





# Elicited Beliefs Functions
#################################

# Log-gamma =========

  KL_dis_beta <- function(a,b){

    # Taken from
    # https://github.com/cran/Compositional/blob/master/R/kl.diri.R

    a0 <- sum(a)
    b0 <- sum(b)
    f <- sum( (a - b) * ( digamma(a) - digamma(a0) ) ) + sum( lgamma(b) - lgamma(a) ) + lgamma(a0) - lgamma(b0)

    return(f)

  }


# Est_Eb Quantile Question =======

  est_eB_beta <- function(quart.mat, true.val=c(60,60)){

    # Estimate elicited Beliefs with a beta distribution
    # from a matrix of responses (q25,q50,q75)
    # normal model for measurment
    # q25 ~ N(mu_1,sigma); q50 ~ N(mu_2,sigma);  q75 ~ N(mu_3,sigma);
    # mu_1 = Q(0.25, alpha, beta), mu_2 = Q(0.5, alpha, beta), mu_3 = Q(0.75, alpha, beta)

    # v is a vector of size 3 containing lower quartile, median, and upper quartile
    if(ncol(quart.mat)!=3) stop('quartile measures (in columns) needs to be of length 3')

    # Expected value from Beta density for one Respondent
    mu <- function(shape){

      # Expected quantile values
      Eq <- c(qbeta(0.25,shape[1],shape[2])
             ,qbeta(0.5 ,shape[1],shape[2])
             ,qbeta(0.75,shape[1],shape[2]))

      # Return
      return(Eq)

    }

    # Log-liklihood Function for respondent
    log_lik_q <- function(theta,
                          null=FALSE,tv=true.val,
                          q=quart.mat,
                          belief_fun=mu){

      # Paramters
      if(null){ # Null Model only estimates error variance with true paramters
        alpha <- tv[1]  # True alpha shape paramter of beta distributed data
        beta  <- tv[2]  # True beta shape paramter of beta distributed data
        sigma <- exp(theta[1]) # Meaurment error standard eviation
      } else { # Model for estimating paramters of beta
        alpha <- exp(theta[1]) # Positive alpha shape paramter of beta belief
        beta  <- exp(theta[2]) # Positive beta shape paramter of beta belief
        sigma <- exp(theta[3]) # Meaurment error standard eviation
      }

      # Expected Responses
        Eq <- belief_fun(c(alpha,beta))

      # Log-Likihood
        log_lik_i <- apply(q,1,function(dat){
                              dnorm(dat[1], Eq[1],sigma,log=T) +
                              dnorm(dat[2], Eq[2],sigma,log=T) +
                              dnorm(dat[3], Eq[3],sigma,log=T)
                              })
      # Return
        return(sum(log_lik_i))

    }

    # Maximize Log-Liklihood for full model
        res <- optim(par=c(log(1),log(1),log(0.1)),
                     fn=log_lik_q,
                     hessian = T, control=list(fnscale=-1),
                     lower=c(0,0,-10),upper=c(10,10,5),
                     method = "L-BFGS-B")

    # Maximize Log-Liklihood for null model
        res_null <- optim(par=c(log(0.1)),lower = -30,upper=30,
                          fn=log_lik_q, null=TRUE,
                          hessian = T,control=list(fnscale=-1),
                          method = "Brent")

    # LR-Test
        LR_asym <- 2*(res$value - res_null$value)
        lr <- pchisq(LR_asym,2,lower.tail = F)

    # Return Parameter estimates as vector
        names(res$par) <- c("alpha","beta","sigma")
        return(list("par"= exp(res$par),
                    "log_lik"= res$value,
                    "log_lik_null"=res_null$value,
                    "lr"=lr,
                    "n" = nrow(quart.mat)
                    )
        )

    }


  est_eB_beta_i <- function(quart.mat, sigma = 0.09){

    # Estimate elicited Beliefs with a beta distribution

    # v is a vector of size 3 containing lower quartile, median, and upper quartile
    if(ncol(quart.mat)!=3) stop('quartile measures (in columns) needs to be of length 3')

    # Information
    N <- nrow(quart.mat)
    K <- 3
    qk <- c(0.25,0.5, 0.75)

    # Estimate sigma
    est_sigma <- function(Y,shapes){
      Qbeta_i <- t(apply(shapes[sel,],1,function(s) qbeta(qk,s[1],s[2])))
      sqrt(sum((Y[sel,] - Qbeta_i)^2)/(N*K))
    }

    # log-Liklihood
    log.lik.i <- function(theta, y){
      Q.beta <- qbeta(qk, exp(theta[1]), exp(theta[2]))
      sum(dnorm(y,
                Q.beta,
                sigma, log = T))
    }

    # Estimate Shape Paramters
    est_shapes_i <- function(y){
      res <- optim(log(c(1,1)), log.lik.i, y=y,
                   control =list("fnscale"=-1))
      exp(res$par)
    }


    # Prepare Objects for estimation
    res.mat <- matrix(NA, nrow=N, ncol=2)

    # Only estimate for respondent where constraint holds
    sel <- quart.mat[1] < quart.mat[2] & quart.mat[2] < quart.mat[3]

    # Estimations
    res.mat[sel,] <- t(apply((quart.mat[sel,]),1,est_shapes_i))
    sigma <- est_sigma(quart.mat, res.mat)

    # Return Results
    return(list("shapes"= res.mat,
                "sigma"= sigma,
                "n" = sum(sel)
    ))
    }


# Est_Eb Interval =======

  est_eB_beta_int <- function(resp.mat,
                              cut.offs,
                              true.val =c(60,60),
                              start.vals = c(log(1),log(1),log(0.1),log(0.1)) # Starting Values
                              ){

    # Estimate elicited Beliefs with a beta distribution
    # from a matrix of interval responses (y_mean,p_low,p_high)
    # and information about cut.offs (c.low,c.high)
    # normal model for measurment
    # y_mean ~ N(mu_1,sigma_1); p_low ~ N(mu_2,sigma_2);  q75 ~ N(mu_3,sigma_2);
    # mu_1 = alpha/( alpha + beta), mu_2 = Q(c.low, alpha, beta), mu_3 = 1-Q(c.high, alpha, beta)
    # where P ist the CDF of the Beta distribution

    # v is a vector of size 3 containing lower quartile, median, and upper quartile
    if(ncol(resp.mat)!=3) stop('quartile measures (in columns) needs to be of length 3')

    # Log-liklihood Function for one respondent
    log_lik_q <- function(theta,
                          q=resp.mat,
                          null=FALSE, tv=true.val, # For Null Model
                          co=cut.offs # Cutt-offs
                          ){

      # Paramters
      if(null){ # Null Model only estimates error variance with true paramters
        alpha <- tv[1]  # True alpha shape paramter of beta distributed data
        beta  <- tv[2]  # True beta shape paramter of beta distributed data
        sigma <- exp(theta[1]) # Meaurment error standard eviation
        sigma2 <- exp(theta[2])
      } else { # Model for estimating paramters of beta
        alpha <- exp(theta[1]) # Positive alpha shape paramter of beta belief
        beta  <- exp(theta[2]) # Positive beta shape paramter of beta belief
        sigma <- exp(theta[3]) # Meaurment error standard deviation for mean
        sigma2 <- exp(theta[4]) # Measurment error standard dev. prob
      }


      # Expected value and probabilities given the paramters
      Eq <- c(alpha/(alpha + beta)
              ,pbeta(co[1],alpha,beta,lower.tail = T)
              ,pbeta(co[2],alpha,beta,lower.tail = F))

      # Log-Likihoods
      log_lik_i <- apply(q,1,function(dat){
          dnorm(dat[1], Eq[1],sigma ,log=T) +
          dnorm(dat[2], Eq[2],sigma2,log=T) +
          dnorm(dat[3], Eq[3],sigma2,log=T)
      })
      # Return
      return(sum(log_lik_i))

    }

    # Minmize Log-Liklihood
    res <- optim(par=start.vals,
                 fn=log_lik_q,
                 hessian = T, control=list(fnscale=-1),
                 lower=c(0,0,-10,-10),upper=c(10,10,5,5),
                 method = "L-BFGS-B")

    # Maximize Log-Liklihood for null model
    res_null <- optim(par=c(log(0.1),log(0.1)),
                      fn=log_lik_q, null=TRUE,
                      hessian = T,control=list(fnscale=-1),
                      lower=c(-10,-10),upper=c(5,5),
                      method = "L-BFGS-B")

    # LR-Test
    LR_asym <- 2*(res$value - res_null$value)
    lr <- pchisq(LR_asym,2,lower.tail = F)

    # Return Parameter estimates as vector
    names(res$par) <- c("alpha","beta","sigma_value","sigma_prob")
    return(list("par"= exp(res$par),
                "log_lik"= res$value,
                "log_lik_null"=res_null$value,
                "lr"=lr,
                "n" = nrow(resp.mat)))

  }

  est_eB_beta_int_i <- function(quart.mat, cut.offs, sigma = c(0.09,0.09)){

    # Estimate elicited Beliefs with a beta distribution

    # v is a vector of size 3 containing lower quartile, median, and upper quartile
    if(ncol(quart.mat)!=3) stop('quartile measures (in columns) needs to be of length 3')

    # Information
    N <- nrow(quart.mat)
    K <- 3

    # Estimate sigma
    est_sigma <- function(Y,shapes){
      Emean_i <- as.numeric(apply(shapes,1,function(s) c(s[1]/(s[1] + s[2]))))
      Eprob_i <- t(apply(shapes,1,function(s) c(
                                   pbeta(cut.offs[1],s[1],s[2],lower.tail = T)
                                  ,pbeta(cut.offs[2],s[1],s[2],lower.tail = F))))
      sig1 <- sqrt(sum((Y[,1] - Emean_i)^2)/(N))
      sig2 <- sqrt(sum((Y[,2:3] - Eprob_i)^2)/(2*N))
      return(c(sig1,sig2))
    }

    # log-Liklihood
    log.lik.i <- function(theta, y){
      alpha <- exp(theta[1]); beta <- exp(theta[2])
      mu <- c(alpha/(alpha + beta)
              ,pbeta(cut.offs[1],alpha,beta,lower.tail = T)
              ,pbeta(cut.offs[2],alpha,beta,lower.tail = F))
      #Log-lik
      sum(dnorm(y,
                mu,
                c(sigma[1],sigma[2],sigma[2]), log = T))
    }

    # Estimate Shape Paramters
    est_shapes_i <- function(y){
      res <- optim(c(1,1), log.lik.i, y=y,
                   control =list("fnscale"=-1))
      exp(res$par)
    }


     # Estimations Iterate
    while(TRUE){
      shapes <- t(apply((quart.mat),1,est_shapes_i))
      sigma_new <- est_sigma(quart.mat, shapes)
      if(sum(abs(sigma - sigma_new))<0.001){
        sigma <- sigma_new
        break()
      } else {
        sigma <- sigma_new
      }
    }

    # Return Results
    return(list("shapes"= shapes,
                "sigma"= sigma,
                "n" = N
    ))
  }

# Est_Eb Manski =======

  est_eB_beta_manski <- function(resp.mat,
                                 true.val=c(60,60),
                                 start.vals = c(log(1),log(1),log(0.1)) # Starting Values
                                  ){

    # Estimate elicited Beliefs with a beta distribution
    # from a matrix of interval responses (y_mean,y_low,p_high,p_low,p_high)
    # and information about cut.offs (c.low,c.high)
    # normal model for measurment
    # y_mean ~ N(mu_1,sigma_1);
    # y_low ~ N(mu_2,sigma_1); y_high ~ N(mu_3,sigma_1);
    # probabilities are fixed (otheriwse complicated)
    # mu_1 = alpha/( alpha + beta),
    # mu_4 = Q(p_low, alpha, beta), mu_3 = 1-Q(p_high, alpha, beta)
    # where Q ist the CDF of the Beta distribution

    # v is a vector of size % containing mean, lower_bound, upper_bound, p_lower_bound, p_upper_bound
    if(ncol(resp.mat)!=5) stop('measures (in columns) needs to be of length 5')


    # Log-liklihood Function for one respondent
    log_lik_q <- function(theta,
                          null=FALSE,tv=true.val,
                          q=resp.mat){

      # Paramters
      if(null){ # Null Model only estimates error variance with true paramters
        alpha <- tv[1]  # True alpha shape paramter of beta distributed data
        beta  <- tv[2]  # True beta shape paramter of beta distributed data
        sigma <- exp(theta[1]) # Meaurment error standard eviation

      } else {# Model for estimating paramters of beta
        alpha <- exp(theta[1]) # Positive alpha shape paramter of beta belief
        beta  <- exp(theta[2]) # Positive beta shape paramter of beta belief
        sigma <- exp(theta[3]) # Meaurment error standard deviation for mean
      }

      # Log-Liklihood
      log_lik_i <- apply(q,1,function(dat){

        # Probabilities
        prob <- dat[4:5]

        # mean, lower_bound, upper_bound
        mean <- alpha/(beta + alpha)
        lower_bound <- qbeta(as.numeric(prob[1]),alpha,beta,lower.tail = T)
        upper_bound <- qbeta(as.numeric(prob[2]),alpha,beta,lower.tail = F)

        # Log_liklehood
        (dnorm(dat[1], mean,sigma ,log=T) +
         dnorm(dat[2], lower_bound,sigma,log=T) +
         dnorm(dat[3], upper_bound,sigma,log=T))
      })

      # Return
      return(sum(log_lik_i))

    }

    # Minmize Log-Liklihood
    res <- optim(par=start.vals,
                 fn=log_lik_q,
                 hessian = T, control=list(fnscale=-1),
                 lower=c(0,0,-10),upper=c(10,10,5),
                 method = "L-BFGS-B")

    # Maximize Log-Liklihood for null model
    res_null <- optim(par=c(log(0.1)),lower = -30,upper=30,
                      fn=log_lik_q, null=TRUE,
                      control=list(fnscale=-1),
                      method = "Brent")

    # LR-Test
    LR_asym <- 2*(res$value - res_null$value)
    lr <- pchisq(LR_asym,2,lower.tail = F)

    # Return Parameter estimates as vector
    names(res$par) <- c("alpha","beta","sigma")
    return(list("par"= exp(res$par),
                "log_lik"= res$value,
                "log_lik_null"=res_null$value,
                "lr"=lr,
                "n" = nrow(resp.mat)))
  }


  est_eB_beta_manski_i <- function(quart.mat, 
                                   init.shapes = c(1,1),
                                   cut.offs, 
                                   sigma = c(0.09,0.09)){

    # Estimate elicited Beliefs with a beta distribution

    # v is a vector of size 3 containing lower quartile, median, and upper quartile
    if(ncol(quart.mat)!=5) stop('manski measures (in columns) needs to be of length 5')

    # Information
    N <- nrow(quart.mat)
    K <- 5

    # Estimate sigma
    est_sigma <- function(Y,shapes){
      Emean_i <- as.numeric(apply(shapes,1,function(s) c(s[1]/(s[1] + s[2]))))
      Eprob_i <- t(sapply(1:nrow(shapes),function(i) c(
         qbeta(as.numeric(Y[i,3]),shapes[i,1],shapes[i,2],lower.tail = T)
        ,qbeta(as.numeric(Y[i,4]),shapes[i,1],shapes[i,2],lower.tail = F))))
      sig1 <- sum((Y[sel,1] - Emean_i[sel])^2)
      sig2 <- sum((Y[sel,2:3] - Eprob_i[sel,])^2)
      return(sqrt((sig1 + sig2)/3*N))
    }

    log.lik.i <- function(theta, y){

      # mean, lower_bound, upper_bound
      mean <- exp(theta[1])/(exp(theta[1]) + exp(theta[2]))
      lower_bound <- qbeta(y[4],exp(theta[1]),exp(theta[2]),lower.tail = T)
      upper_bound <- qbeta(y[5],exp(theta[1]),exp(theta[2]),lower.tail = F)

      #Log-lik
       sum(dnorm(y[1:3],
             mean=c(mean,lower_bound,upper_bound),
             sd=c(sigma) ,log=T))
    }

    # Estimate Shape Paramters
    est_shapes_i <- function(dat){
      res <- optim(log(init.shapes), log.lik.i, y=dat,
                   control =list("fnscale"=-1))
      exp(res$par)
    }


    # Contraint lower.p and upper.p <1
    sel <- (quart.mat[,4] + quart.mat[,5] < 1) &
            quart.mat[,2] < quart.mat[,3] &
            quart.mat[,4] != 0 &
            quart.mat[,5] != 0 
    shapes <- matrix(NA, nrow = N, ncol=2)

    # Estimations Iterate
    while(TRUE){
      shapes[sel,] <- t(apply((quart.mat[sel,]),1,est_shapes_i))
      sigma_new <- est_sigma(quart.mat, shapes)
      if(sum(abs(sigma - sigma_new))<0.001){
        sigma <- sigma_new
        break()
      } else {
        sigma <- sigma_new
      }
    }

    # Return Results
    return(list("shapes"= shapes,
                "sigma"= sigma,
                "n" = N
    ))
  }

# Est_bins Bins and Balls =======

  est_eB_beta_binsballs <- function(resp.mat,
                                 true.val=c(60,60),
                                 c.p=(c(20,25,30,35,40,45,50,55,60,65,70,75,80)/100),
                                 start.vals = c(log(1),log(1)) # Starting Values
  ){

    # Estimate elicited Beliefs with a beta distribution


    # v is a vector of size % containing mean, lower_bound, upper_bound, p_lower_bound, p_upper_bound
    if(ncol(resp.mat)!=12) stop('measures (in columns) needs to be of length 12')


    # Log-liklihood Function for one respondent
    log_lik_q <- function(theta,
                          cuts=c.p,
                          q=resp.mat){

      # Paramters
        alpha <- exp(theta[1]) # Positive alpha shape paramter of beta belief
        beta  <- exp(theta[2]) # Positive beta shape paramter of beta belief

      # Log-Liklihood
        log_lik_i <- apply(q,1,function(y){

          # Calcaulcate probabaility
          es <- NULL

          for(c in 1:12){
            es[c] <- pbeta(cuts[(c+1)],alpha,beta) - pbeta(cuts[(c)],alpha,beta)
          }

          # Log_liklehood Binomial
          lik <- sum(sapply(1:12, function(i) dbinom(as.numeric(y[i]),100,es[i],log = T)))

          lik[is.infinite(lik)] <- -2763.104

          return(lik)


        })

        # Return
        return(sum(log_lik_i))

        }

    # Minmize Log-Liklihood
      res <- optim(par=start.vals,
                   fn=log_lik_q,
                   hessian = T, control=list(fnscale=-1),
                   lower=c(0,0),upper=c(5,5),
                   method = "L-BFGS-B")

    # Maximize Log-Liklihood for null model
    res_null_lik <- log_lik_q(log(true.val))

    # LR-Test
    LR_asym <- 2*( res$value - res_null_lik)
    lr <- pchisq(LR_asym,2,lower.tail = F)

    # Return Parameter estimates as vector
    names(res$par) <- c("alpha","beta")
    return(list("par"= exp(res$par),
                "log_lik"= res$value,
                "log_lik_null"=res_null_lik,
                "lr"=lr,
                "n" = nrow(resp.mat)))
  }


  est_eB_beta_binsballs_i <- function(quart.mat, 
                                   init.shapes = c(1,1),
                                   cuts=(c(20,25,30,35,40,45,50,55,60,65,70,75,80)/100),
                                   sigma = c(0.09,0.09)){
    
    # Estimate elicited Beliefs with a beta distribution
    
    # v is a vector of size 3 containing lower quartile, median, and upper quartile
    if(ncol(quart.mat)!=12) stop('bins and balls measures (in columns) needs to be of length 12')
    
    # Information
    N <- nrow(quart.mat)

    # Liklihood i
    log_lik_i <- function(theta,y){
      
      alpha <- exp(theta[1]); beta <- exp(theta[2])
      
      # Calcaulcate probabaility
      es <- NULL
      
      for(c in 1:12){
        es[c] <- pbeta(cuts[(c+1)],alpha,beta) - pbeta(cuts[(c)],alpha,beta)
      }
      
      # Log_liklehood Binomial
      lik <- sum(sapply(1:12, function(i) dbinom(as.numeric(y[i]),100,es[i],log = T)))
      
      lik[is.infinite(lik)] <- -2763.104
      
      return(lik)
     }
  
  
    # Estimate Shape Paramters
    est_shapes_i <- function(dat){
      res <- optim(log(init.shapes), log_lik_i, y=dat,
                   control =list("fnscale"=-1))
      exp(res$par)}

    
    # Estimations Iterate
    sel <- !apply(quart.mat,1,sum)!=100
    shapes <- matrix(NA, nrow = N, ncol=2)
    shapes[sel,] <- t(apply((quart.mat[sel,]),1,est_shapes_i))
 
    
    # Return Results
    return(list("shapes"= shapes,
                "n" = N
    ))
  }
